46a56a74db2a423557584c3ed1c2027299204326,src/main/java/org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler.java,IntervalSampler,getSample,#InputFormat#Job#,276

Before Change


      long records = 0;
      long kept = 0;
      for (int i = 0; i < splitsToSample; ++i) {
        RecordReader<K,V> reader = inf.createRecordReader(
          splits.get(i * splitStep),
          new TaskAttemptContext(job.getConfiguration(), 
                                 new TaskAttemptID()));
        while (reader.nextKeyValue()) {
          ++records;
          if ((double) kept / records < freq) {
            ++kept;
            samples.add(reader.getCurrentKey());
          }
        }
        reader.close();

After Change


      long records = 0;
      long kept = 0;
      for (int i = 0; i < splitsToSample; ++i) {
        TaskAttemptContext samplingContext = new TaskAttemptContext(
            job.getConfiguration(), new TaskAttemptID());
        RecordReader<K,V> reader = inf.createRecordReader(
            splits.get(i), samplingContext);
        reader.initialize(splits.get(i), samplingContext);
        while (reader.nextKeyValue()) {
          ++records;
          if ((double) kept / records < freq) {
            samples.add(ReflectionUtils.copy(job.getConfiguration(),
                                 reader.getCurrentKey(), null));
            ++kept;
          }
        }